24 August 2018

Data?

Datasets, dimensions and file formats

  • Tabular data:
    • e.g. synonyms of scientific names, species traits, field data
    • .csv, .xlsx, .json, .sql
  • Spatial data (2D):
    • e.g. soil maps, land use maps, plot locations, country borders
    • .shp, .tif, .asc, .jpg, .img, .png
  • Spatio-temporal data (3D):
    • e.g. climate data, species distirbution data
    • .nc

Finding data

1. Tabular data

Tabular data

df1
##                 Name      Type Height LitterQuality
## 1 Fraxinus excelsior Deciduous     25             8
## 2    Fagus sylvatica Deciduous     30             3
## 3        Picea abies Evergreen     22             1
## 4    Fagus sylvatica Deciduous     12             3
## 5      Larix decidua Deciduous     25             1

Joining tabular data

  • You need a link!
    • timestamps
    • plot IDs
    • species names

Joining data in R

df1
##                 Name      Type Height LitterQuality
## 1 Fraxinus excelsior Deciduous     25             8
## 2    Fagus sylvatica Deciduous     30             3
## 3        Picea abies Evergreen     22             1
## 4    Fagus sylvatica Deciduous     12             3
## 5      Larix decidua Deciduous     25             1
df2
##              Name  LeafType
## 1     Picea abies    Needle
## 2 Fagus sylvatica Broadleaf
## 3   Quercus robur Broadleaf

Joining data in R

merge(df1,df2,by="Name") #all=TRUE, all.x=TRUE, all.y=TRUE
##              Name      Type Height LitterQuality  LeafType
## 1 Fagus sylvatica Deciduous     30             3 Broadleaf
## 2 Fagus sylvatica Deciduous     12             3 Broadleaf
## 3     Picea abies Evergreen     22             1    Needle
library(plyr)
join(df1,df2,by='Name') #type='left','right','inner' or 'full'
##                 Name      Type Height LitterQuality  LeafType
## 1 Fraxinus excelsior Deciduous     25             8      <NA>
## 2    Fagus sylvatica Deciduous     30             3 Broadleaf
## 3        Picea abies Evergreen     22             1    Needle
## 4    Fagus sylvatica Deciduous     12             3 Broadleaf
## 5      Larix decidua Deciduous     25             1      <NA>

Joining data in R

Joining data in R

join(df1,df2,by='Name',type="right")
##              Name      Type Height LitterQuality  LeafType
## 1     Picea abies Evergreen     22             1    Needle
## 2 Fagus sylvatica Deciduous     30             3 Broadleaf
## 3 Fagus sylvatica Deciduous     12             3 Broadleaf
## 4   Quercus robur      <NA>     NA            NA Broadleaf
join(df1,df2,by='Name',type="full")
##                 Name      Type Height LitterQuality  LeafType
## 1 Fraxinus excelsior Deciduous     25             8      <NA>
## 2    Fagus sylvatica Deciduous     30             3 Broadleaf
## 3        Picea abies Evergreen     22             1    Needle
## 4    Fagus sylvatica Deciduous     12             3 Broadleaf
## 5      Larix decidua Deciduous     25             1      <NA>
## 6      Quercus robur      <NA>     NA            NA Broadleaf

2. Spatial data

Spatial data

Spatial data in R

SpatialData
##           [,1]      [,2]      [,3]      [,4]      [,5]
## [1,] 0.4715742 0.4539041 0.3993645 0.9399316 0.1523114
## [2,] 0.1941211 0.7774937 0.4231381 0.2552547 0.6033563
## [3,] 0.3003553 0.9666672 0.9545557 0.1442665 0.7898651
## [4,] 0.4632861 0.2584902 0.5941901 0.4478228 0.2735380
## [5,] 0.4625379 0.1017277 0.0712397 0.9750144 0.6075700
SpatialData[2,2]
## [1] 0.7774937

Spatial data in R

SpatialData[latitude,longitude]
SpatialData[53.03°,3.75°]

From coordinates to matrix indices

The raster package

library(raster)
## Loading required package: sp
SpatialData <- raster(SpatialData)
extent(SpatialData) <- c(0,4,50,55) #(xmin,xmax,ymin,ymax)
projection(SpatialData) <- crs("+proj=longlat +datum=WGS84")
print(SpatialData)
## class       : RasterLayer 
## dimensions  : 5, 5, 25  (nrow, ncol, ncell)
## resolution  : 0.8, 1  (x, y)
## extent      : 0, 4, 50, 55  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0 
## data source : in memory
## names       : layer 
## values      : 0.0712397, 0.9750144  (min, max)

Extracting values

coordinates <- data.frame(xcoords = c(1,2,5),ycoords = c(51,51,52))
coordinates
##   xcoords ycoords
## 1       1      51
## 2       2      51
## 3       5      52
extract(SpatialData,coordinates)
## [1] 0.1017277 0.0712397        NA

Exercise 2.1

Extract data from the matrix below at spatial location with coordinates 51°N,2°W

# matrix
SpatialData=matrix(seq(25),nrow=5,ncol=5,byrow =TRUE)
# coordinates of the centre of the upperleft cell: 53°N,1°W
origin <- c(53,1) 
# resolution or width of each cell in degrees
resolution <- 2 

3. Spatio-temporal data

Spatio-temporal data

Climate data example

Climate data example

Handling netCDF files

# load netcdf libraries
library(ncdf4)
# open NetCDF file
MeanTemp <- nc_open("./tg_0.50deg_reg_v17.0.nc")
# check properties of the dataset
print(MeanTemp)
...
##      1 variables (excluding dimension variables):
##         short tg[longitude,latitude,time]   
##             long_name: mean temperature
##             units: Celsius
##             standard_name: air_temperature
##             _FillValue: -9999
##             scale_factor: 0.00999999977648258
...

Handling netCDF files

# check properties of the dataset
print(MeanTemp)
...
##      3 dimensions:
##         longitude  Size:232
##             long_name: Longitude values
##             units: degrees_east
##             standard_name: longitude
##         latitude  Size:101
##             long_name: Latitude values
##             units: degrees_north
##             standard_name: latitude
##         time  Size:24837
##             long_name: Time in days
##             units: days since 1950-01-01 00:00
##             standard_name: time
...

Handling netCDF files

# retrieve data on one variable
# >> ncvar_get(ncdf_file,variablename)

# examples
head(ncvar_get(MeanTemp,'latitude'))
## [1] 25.25 25.75 26.25 26.75 27.25 27.75
head(ncvar_get(MeanTemp,'longitude'))
## [1] -40.25 -39.75 -39.25 -38.75 -38.25 -37.75
head(ncvar_get(MeanTemp,'time'))
## [1] 0 1 2 3 4 5

Extracting data with raster package

library(raster)
tg <- brick("./tg_0.50deg_reg_v17.0.nc")
coordinates <- data.frame(x=c(3.75),y=c(51.03))
data <- extract(tg,coordinates)
data[,1:4]
## X1950.01.01 X1950.01.02 X1950.01.03 X1950.01.04 
##        0.47        4.89        5.90        6.22

Looking for a specific date

start_date <- as.Date("01 01 1950",format="%d %m %Y")
print(start_date)
## [1] "1950-01-01"
date <- as.Date("02 10 2008",format="%d %m %Y")
print(date)
## [1] "2008-10-02"
numberofdays <- difftime(date,start_date,units="days")
colnames(data)[numberofdays+1]
## [1] "X2008.10.02"

Exercise 3.1

Write two functions that return the mean temperature for a given location, once for a specified date and once for a specified period

DayTemp <- function (latitude,longitude,date){
  
  return (temperature_value)
}

TempData <- function (name_of_location,latitude,longitude,
                      start_date,end_date){
  
  df <- data.frame(Name = ..., Date = ..., Year = ..., 
                   Temperature = ...)
  return (df)
}

Exercise 3.2

Add mean annual temperature data to the understorey biomass dataset, use 1995-2000 data only

biomass_data <- read.csv('./Biomass.csv')
head (biomass_data)
##   RegionID PlotID Latitude Longitude   Biomass
## 1       BI BI2388 52.73212  23.83462  53.70710
## 2       BI BI2393 52.73049  23.83799 113.12266
## 3       BI BI2592 52.73231  23.84431  72.80202
## 4       BI BI2606 52.73247  23.83131  75.99874
## 5       BI BI2960 52.76170  23.86648  79.90570
## 6       BI BI6447 52.65456  23.65184 127.03926